setwd("le répertoire de mes données")
#knitr::opts_knit$set(root.dir = "~/Documents/GitHub/m1act_Spring21")
datadir <- "~/Documents/GitHub/m1act_Spring21/data"
exportdir <- "~/Documents/GitHub/m1act_Spring21/export"
graphdir <- "~/Documents/GitHub/m1act_Spring21/graph"
library(faraway)
aaa=rnorm(100)
bbb=rpois(100,1)
save(aaa,bbb,file=paste(exportdir,"simulations.RData",sep="/"))
rm(aaa,bbb)
try(aaa)
## Error in try(aaa) : objet 'aaa' introuvable
try(bbb)
## Error in try(bbb) : objet 'bbb' introuvable
load(paste(exportdir,"simulations.RData",sep="/"))
aaa
## [1] -0.79792037 -1.35205511 -0.46179963 -0.68983088 -0.13872252 0.18047802
## [7] -0.10756321 -1.18750873 -0.07161173 -0.02386442 1.08527104 0.76773347
## [13] 0.76536340 -0.01015083 0.64901117 -1.20631499 -0.38339427 -0.17775690
## [19] -0.26530133 -0.62058024 -0.92450352 0.71042069 0.56955588 0.37994659
## [25] 0.06844525 2.33159470 1.08812293 -0.42940056 -0.47727741 -0.09170656
## [31] 0.48963019 -1.26263619 1.38309821 -0.30826147 0.59543564 -0.19066936
## [37] 0.73921247 0.06444115 1.11053499 1.67359603 1.41414694 0.29158559
## [43] 0.40198130 1.51127338 0.35719992 0.15052745 0.15265481 -1.29622113
## [49] 2.17128280 -1.92457874 -0.40288378 -0.88704645 0.60709760 0.38807484
## [55] 0.07504905 -0.06725234 0.26877589 -0.16980368 0.15208394 0.08906470
## [61] -0.09167668 0.33553181 -1.14833223 -0.52951993 1.04243243 -0.49744890
## [67] 0.02898057 -0.34079127 -1.29743458 0.81369288 0.80853853 0.12815465
## [73] 1.09280360 -0.05303212 0.74557702 -0.30766340 -0.19869087 0.65443750
## [79] -0.88219742 1.17845145 -0.02520588 0.80981895 -0.10633053 0.13901020
## [85] 0.15443569 -0.09941498 0.98425220 2.15821178 0.30634940 0.36420878
## [91] -0.17358829 -0.77965566 0.52828412 1.01428117 -1.05588486 1.10118274
## [97] -2.18495551 2.09507661 1.56248767 -1.98624472
bbb
## [1] 3 0 2 0 2 1 2 1 3 2 1 1 0 0 1 1 3 1 1 2 1 2 0 0 1 1 1 2 0 0 1 2 0 1 1 1 0
## [38] 1 0 2 1 1 0 1 2 1 1 2 3 3 1 1 1 4 2 0 0 0 0 2 1 0 0 2 1 0 1 0 2 1 0 0 0 0
## [75] 1 0 0 0 0 0 0 0 1 1 3 1 0 0 1 2 0 1 1 2 1 1 1 1 1 1
load(paste(datadir,"vulnerability.RData",sep="/"))
#attach(vul)
#country_name
#detach(vul)
with(vul, country_name)
## [1] Albania Algeria
## [3] Angola Argentina
## [5] Armenia Australia
## [7] Austria Azerbaijan
## [9] Bahamas Bangladesh
## [11] Belarus Belgium
## [13] Belize Benin
## [15] Bhutan Bolivia
## [17] Bosnia-Hercegovenia Botswana
## [19] Brazil Bulgaria
## [21] Burkina Faso Burundi
## [23] Cambodia Cameroon
## [25] Canada Central African Rep
## [27] Chad Chile
## [29] China P Rep Colombia
## [31] Congo Costa Rica
## [33] Cote d'Ivoire Croatia
## [35] Cuba Czech Rep
## [37] Dominican Rep Ecuador
## [39] Egypt El Salvador
## [41] Eritrea Ethiopia
## [43] Fiji France
## [45] Gambia The Georgia
## [47] Germany Ghana
## [49] Greece Grenada
## [51] Guatemala Guinea
## [53] Guinea Bissau Guyana
## [55] Haiti Honduras
## [57] Hong Kong (China) Hungary
## [59] India Indonesia
## [61] Iran Islam Rep Ireland
## [63] Israel Italy
## [65] Jamaica Japan
## [67] Jordan Kazakhstan
## [69] Kenya Korea Rep
## [71] Kyrgyzstan Lao P Dem Rep
## [73] Lebanon Lesotho
## [75] Lithuania Macedonia FRY
## [77] Madagascar Malawi
## [79] Malaysia Mali
## [81] Mauritania Mauritius
## [83] Mexico Moldova Rep
## [85] Mongolia Morocco
## [87] Mozambique Myanmar
## [89] Namibia Nepal
## [91] Netherlands New Zealand
## [93] Nicaragua Niger
## [95] Nigeria Norway
## [97] Oman Pakistan
## [99] Panama Papua New Guinea
## [101] Paraguay Peru
## [103] Philippines Poland
## [105] Portugal Romania
## [107] Russia Rwanda
## [109] Samoa Saudi Arabia
## [111] Senegal Sierra Leone
## [113] Slovakia South Africa
## [115] Spain Sri Lanka
## [117] St Lucia St Vincent and The Grenadines
## [119] Sudan Suriname
## [121] Swaziland Switzerland
## [123] Syrian Arab Rep Tajikistan
## [125] Tanzania Uni Rep Thailand
## [127] Timor-Leste Togo
## [129] Tonga Trinidad and Tobago
## [131] Tunisia Turkey
## [133] Uganda Ukraine
## [135] United Kingdom United States
## [137] Uruguay Vanuatu
## [139] Venezuela Viet Nam
## [141] Yemen Zaire/Congo Dem Rep
## [143] Zambia Zimbabwe
## 144 Levels: Albania Algeria Angola Argentina Armenia Australia ... Zimbabwe
summary(vul)
## country_name ln_urb ln_events ln_fert
## Albania : 1 Min. :2.182 Min. :0.6931 Min. :0.3716
## Algeria : 1 1st Qu.:3.421 1st Qu.:2.0794 1st Qu.:0.9507
## Angola : 1 Median :3.911 Median :2.8332 Median :1.4585
## Argentina: 1 Mean :3.773 Mean :2.7752 Mean :1.3283
## Armenia : 1 3rd Qu.:4.190 3rd Qu.:3.3758 3rd Qu.:1.7047
## Australia: 1 Max. :4.588 Max. :5.9480 Max. :2.0477
## (Other) :138
## hdi ln_pop ln_death_risk death_risk
## Min. :0.3350 Min. : 0.5878 Min. :-4.3920 Min. : 0.01238
## 1st Qu.:0.5321 1st Qu.: 4.2487 1st Qu.:-1.3588 1st Qu.: 0.25697
## Median :0.7360 Median : 5.2097 Median :-0.3045 Median : 0.73753
## Mean :0.6887 Mean : 5.1372 Mean :-0.1689 Mean : 5.62025
## 3rd Qu.:0.8034 3rd Qu.: 6.2394 3rd Qu.: 0.7831 3rd Qu.: 2.18859
## Max. :0.9530 Max. :10.0206 Max. : 5.1184 Max. :167.07002
##
with(vul,pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, main="Simple Scatterplot Matrix"))
if(!("GGally" %in% rownames(installed.packages()))){install.packages("GGally")}
library(GGally)
## Warning: package 'ggplot2' was built under R version 4.0.2
with(vul, ggpairs(vul[,c("ln_death_risk","ln_events","ln_fert","hdi","ln_pop")]))
fit_univ = lm(ln_death_risk~ln_events,data=vul)
print(fit_univ)
##
## Call:
## lm(formula = ln_death_risk ~ ln_events, data = vul)
##
## Coefficients:
## (Intercept) ln_events
## -1.6048 0.5174
str(fit_univ)
## List of 12
## $ coefficients : Named num [1:2] -1.605 0.517
## ..- attr(*, "names")= chr [1:2] "(Intercept)" "ln_events"
## $ residuals : Named num [1:144] -0.297 0.692 0.254 -1.381 -1.48 ...
## ..- attr(*, "names")= chr [1:144] "1" "2" "3" "4" ...
## $ effects : Named num [1:144] 2.03 -6.06 0.26 -1.41 -1.38 ...
## ..- attr(*, "names")= chr [1:144] "(Intercept)" "ln_events" "" "" ...
## $ rank : int 2
## $ fitted.values: Named num [1:144] -0.4135 0.2042 -0.0296 0.2772 -0.8875 ...
## ..- attr(*, "names")= chr [1:144] "1" "2" "3" "4" ...
## $ assign : int [1:2] 0 1
## $ qr :List of 5
## ..$ qr : num [1:144, 1:2] -12 0.0833 0.0833 0.0833 0.0833 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:144] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:2] "(Intercept)" "ln_events"
## .. ..- attr(*, "assign")= int [1:2] 0 1
## ..$ qraux: num [1:2] 1.08 1.06
## ..$ pivot: int [1:2] 1 2
## ..$ tol : num 1e-07
## ..$ rank : int 2
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 142
## $ xlevels : Named list()
## $ call : language lm(formula = ln_death_risk ~ ln_events, data = vul)
## $ terms :Classes 'terms', 'formula' language ln_death_risk ~ ln_events
## .. ..- attr(*, "variables")= language list(ln_death_risk, ln_events)
## .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:2] "ln_death_risk" "ln_events"
## .. .. .. ..$ : chr "ln_events"
## .. ..- attr(*, "term.labels")= chr "ln_events"
## .. ..- attr(*, "order")= int 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(ln_death_risk, ln_events)
## .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. ..- attr(*, "names")= chr [1:2] "ln_death_risk" "ln_events"
## $ model :'data.frame': 144 obs. of 2 variables:
## ..$ ln_death_risk: num [1:144] -0.71 0.896 0.225 -1.104 -2.367 ...
## ..$ ln_events : num [1:144] 2.3 3.5 3.04 3.64 1.39 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language ln_death_risk ~ ln_events
## .. .. ..- attr(*, "variables")= language list(ln_death_risk, ln_events)
## .. .. ..- attr(*, "factors")= int [1:2, 1] 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:2] "ln_death_risk" "ln_events"
## .. .. .. .. ..$ : chr "ln_events"
## .. .. ..- attr(*, "term.labels")= chr "ln_events"
## .. .. ..- attr(*, "order")= int 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(ln_death_risk, ln_events)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:2] "numeric" "numeric"
## .. .. .. ..- attr(*, "names")= chr [1:2] "ln_death_risk" "ln_events"
## - attr(*, "class")= chr "lm"
lm(ln_death_risk~ln_events,data=vul)$x
## named list()
lm.X <- lm(ln_death_risk~ln_events,data=vul,x=TRUE,y=TRUE)$x
lm.y <- lm(ln_death_risk~ln_events,data=vul,x=TRUE,y=TRUE)$y
all(zapsmall(with(vul,ln_death_risk)-lm.y))
## Warning in all(zapsmall(with(vul, ln_death_risk) - lm.y)): conversion
## automatique d'un argument de type 'double' en booléen (logical)
## [1] FALSE
head(lm.X)
## (Intercept) ln_events
## 1 1 2.302585
## 2 1 3.496508
## 3 1 3.044523
## 4 1 3.637586
## 5 1 1.386294
## 6 1 4.394449
head(lm.y)
## 1 2 3 4 5 6
## -0.7102835 0.8961845 0.2246880 -1.1036180 -2.3671240 -1.0504330
betahat=MASS::ginv(t(lm.X)%*%lm.X)%*%t(lm.X)%*%lm.y
betahat
## [,1]
## [1,] -1.6047775
## [2,] 0.5173794
fit_univ$coefficients
## (Intercept) ln_events
## -1.6047775 0.5173794
(summary(fit_univ)$coefficients)[,"Std. Error"]
## (Intercept) ln_events
## 0.4220358 0.1434476
#diag(sigma^2 (X'X)^{-1})
#CMR
sigma2hat <- anova(fit_univ)["Residuals","Mean Sq"]
sigma2hat*MASS::ginv(t(lm.X)%*%lm.X)
## [,1] [,2]
## [1,] 0.17811425 -0.05710631
## [2,] -0.05710631 0.02057723
diag(sigma2hat*MASS::ginv(t(lm.X)%*%lm.X))
## [1] 0.17811425 0.02057723
((summary(fit_univ)$coefficients)[,"Std. Error"])^2
## (Intercept) ln_events
## 0.17811425 0.02057723
#sum(Yhat-Yobs)^2/(n-rg(X))
head(fit_univ$fitted.values)
## 1 2 3 4 5 6
## -0.41346753 0.20424358 -0.02960412 0.27723443 -0.88753758 0.66881972
head(fitted(fit_univ))
## 1 2 3 4 5 6
## -0.41346753 0.20424358 -0.02960412 0.27723443 -0.88753758 0.66881972
head(predict(fit_univ))
## 1 2 3 4 5 6
## -0.41346753 0.20424358 -0.02960412 0.27723443 -0.88753758 0.66881972
SCRes = sum((fitted(fit_univ)-lm.y)^2)
SCRes
## [1] 401.4299
anova(fit_univ)["Residuals","Sum Sq"]
## [1] 401.4299
SCRes/(length(fit_univ$residuals)-fit_univ$rank)
## [1] 2.826971
anova(fit_univ)["Residuals","Mean Sq"]
## [1] 2.826971
Comment * obtenir obtenir une valeur ajustée ? * calculer un IC autour de la moyenne de la régression ? * calculer un IP pour une nouvelle valeur ?
Création d’un data.frame qui contient la ou les valeurs pour lesquels nous voulons une prédictions, un IC ou un IP.
Ajuste le LM de ln_death_risk par rapport à toutes les autres variables. Attention au symbole . dans l’écriture, il complique l’utilisation de predict dans la suite.
lm(ln_death_risk~.,data=vul)
##
## Call:
## lm(formula = ln_death_risk ~ ., data = vul)
##
## Coefficients:
## (Intercept)
## -0.71028
## country_nameAlgeria
## 1.60647
## country_nameAngola
## 0.93497
## country_nameArgentina
## -0.39333
## country_nameArmenia
## -1.65684
## country_nameAustralia
## -0.34015
## country_nameAustria
## -0.69708
## country_nameAzerbaijan
## -1.47436
## country_nameBahamas
## 2.03204
## country_nameBangladesh
## 4.82158
## country_nameBelarus
## -2.50897
## country_nameBelgium
## -0.98479
## country_nameBelize
## 3.29879
## country_nameBenin
## -0.34731
## country_nameBhutan
## 3.87092
## country_nameBolivia
## 1.78249
## country_nameBosnia-Hercegovenia
## -2.14523
## country_nameBotswana
## 0.47619
## country_nameBrazil
## -0.39255
## country_nameBulgaria
## -0.25609
## country_nameBurkina Faso
## -0.32023
## country_nameBurundi
## 0.92560
## country_nameCambodia
## 2.32582
## country_nameCameroon
## -0.63194
## country_nameCanada
## -0.97599
## country_nameCentral African Rep
## -0.80996
## country_nameChad
## 1.02454
## country_nameChile
## 0.73292
## country_nameChina P Rep
## 1.12384
## country_nameColombia
## 1.13679
## country_nameCongo
## -1.14948
## country_nameCosta Rica
## 1.33603
## country_nameCote d'Ivoire
## -1.64381
## country_nameCroatia
## -3.00085
## country_nameCuba
## 0.22253
## country_nameCzech Rep
## -0.26473
## country_nameDominican Rep
## 2.85372
## country_nameEcuador
## 1.44158
## country_nameEgypt
## 0.13329
## country_nameEl Salvador
## 2.43969
## country_nameEritrea
## -2.48156
## country_nameEthiopia
## 1.07948
## country_nameFiji
## 2.83539
## country_nameFrance
## -0.31220
## country_nameGambia The
## 1.55045
## country_nameGeorgia
## -1.50327
## country_nameGermany
## -1.12248
## country_nameGhana
## 0.42534
## country_nameGreece
## -0.65955
## country_nameGrenada
## 3.81138
## country_nameGuatemala
## 3.00251
## country_nameGuinea
## -1.04987
## country_nameGuinea Bissau
## -1.44148
## country_nameGuyana
## 1.70295
## country_nameHaiti
## 4.57122
## country_nameHonduras
## 5.64852
## country_nameHong Kong (China)
## 0.75312
## country_nameHungary
## -0.32554
## country_nameIndia
## 1.53890
## country_nameIndonesia
## 0.78556
## country_nameIran Islam Rep
## 1.29907
## country_nameIreland
## -0.65920
## country_nameIsrael
## -0.85106
## country_nameItaly
## -1.03665
## country_nameJamaica
## 1.12436
## country_nameJapan
## -0.02259
## country_nameJordan
## -0.49753
## country_nameKazakhstan
## -0.07571
## country_nameKenya
## 1.22299
## country_nameKorea Rep
## 1.43912
## country_nameKyrgyzstan
## -1.67924
## country_nameLao P Dem Rep
## 0.90221
## country_nameLebanon
## -0.28543
## country_nameLesotho
## 0.34927
## country_nameLithuania
## -1.31896
## country_nameMacedonia FRY
## -1.74645
## country_nameMadagascar
## 2.39586
## country_nameMalawi
## 2.34095
## country_nameMalaysia
## 0.71044
## country_nameMali
## -0.76567
## country_nameMauritania
## 0.72398
## country_nameMauritius
## -0.01242
## country_nameMexico
## 1.06316
## country_nameMoldova Rep
## 0.59083
## country_nameMongolia
## 1.97763
## country_nameMorocco
## 1.43654
## country_nameMozambique
## 2.11208
## country_nameMyanmar
## 5.82870
## country_nameNamibia
## 0.69992
## country_nameNepal
## 2.74827
## country_nameNetherlands
## -1.38592
## country_nameNew Zealand
## -1.24227
## country_nameNicaragua
## 4.42764
## country_nameNiger
## -0.01850
## country_nameNigeria
## -0.41541
## country_nameNorway
## -3.68169
## country_nameOman
## 1.75716
## country_namePakistan
## 1.83876
## country_namePanama
## 0.95226
## country_namePapua New Guinea
## 1.75926
## country_nameParaguay
## 0.86582
## country_namePeru
## 1.52230
## country_namePhilippines
## 3.21954
## country_namePoland
## -0.95817
## country_namePortugal
## -0.55522
## country_nameRomania
## 0.83515
## country_nameRussia
## -0.51183
## country_nameRwanda
## 0.41185
## country_nameSamoa
## 2.86334
## country_nameSaudi Arabia
## -0.60936
## country_nameSenegal
## 0.91395
## country_nameSierra Leone
## 0.19639
## country_nameSlovakia
## 0.28495
## country_nameSouth Africa
## 0.25514
## country_nameSpain
## -1.09686
## country_nameSri Lanka
## 1.03989
## country_nameSt Lucia
## 2.66201
## country_nameSt Vincent and The Grenadines
## 2.06841
## country_nameSudan
## 0.66015
## country_nameSuriname
## 0.16024
## country_nameSwaziland
## -2.16891
## country_nameSwitzerland
## -0.71967
## country_nameSyrian Arab Rep
## -1.36322
## country_nameTajikistan
## 3.33479
## country_nameTanzania Uni Rep
## 0.59007
## country_nameThailand
## 1.22816
## country_nameTimor-Leste
## -2.20568
## country_nameTogo
## -0.09997
## country_nameTonga
## 0.12250
## country_nameTrinidad and Tobago
## -0.60702
## country_nameTunisia
## -0.23412
## country_nameTurkey
## -0.17785
## country_nameUganda
## 0.76894
## country_nameUkraine
## -1.95883
## country_nameUnited Kingdom
## -0.64498
## country_nameUnited States
## 0.78660
## country_nameUruguay
## -0.70649
## country_nameVanuatu
## 3.26418
## country_nameVenezuela
## 4.95600
## country_nameViet Nam
## 2.68397
## country_nameYemen
## 1.48377
## country_nameZaire/Congo Dem Rep
## -0.60716
## country_nameZambia
## -1.73928
## country_nameZimbabwe
## 0.39979
## ln_urb
## NA
## ln_events
## NA
## ln_fert
## NA
## hdi
## NA
## ln_pop
## NA
## death_risk
## NA
newdata=data.frame(ln_events=3.4)
newdata
## ln_events
## 1 3.4
pred=predict(fit_univ,newdata,interval="prediction")
pred
## fit lwr upr
## 1 0.1543123 -3.185642 3.494266
sum(fit_univ$coefficients*c(1,3.4))
## [1] 0.1543123
ic=predict(fit_univ,interval="confidence")
print(ic[1:5,])
## fit lwr upr
## 1 -0.41346753 -0.72116718 -0.1057679
## 2 0.20424358 -0.14006908 0.5485563
## 3 -0.02960412 -0.31691647 0.2577082
## 4 0.27723443 -0.09224715 0.6467160
## 5 -0.88753758 -1.36903423 -0.4060409
if(!("HH" %in% rownames(installed.packages()))){install.packages("HH")}
library('HH')
HH::ci.plot(fit_univ)
fit = lm(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
summary(fit)
##
## Call:
## lm(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop,
## data = vul)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4518 -0.7673 -0.1513 0.5669 6.2271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.3485 1.5175 -3.524 0.000575 ***
## ln_events 1.3708 0.1792 7.649 3.04e-12 ***
## ln_fert 2.1961 0.4614 4.760 4.81e-06 ***
## hdi 1.9922 1.2628 1.578 0.116928
## ln_pop -0.5672 0.1026 -5.529 1.54e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-squared: 0.4221, Adjusted R-squared: 0.4055
## F-statistic: 25.38 on 4 and 139 DF, p-value: 8.522e-16
anova(fit)
## Analysis of Variance Table
##
## Response: ln_death_risk
## Df Sum Sq Mean Sq F value Pr(>F)
## ln_events 1 36.775 36.775 20.186 1.466e-05 ***
## ln_fert 1 71.336 71.336 39.156 4.542e-09 ***
## hdi 1 21.154 21.154 11.611 0.0008576 ***
## ln_pop 1 55.703 55.703 30.575 1.540e-07 ***
## Residuals 139 253.237 1.822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit)
## (Intercept) ln_events ln_fert hdi ln_pop
## -5.3484985 1.3708219 2.1960509 1.9921835 -0.5672164
head(residuals(fit))
## 1 2 3 4 5 6
## -0.4655284 0.1040698 -0.6106572 -0.9839096 0.1853458 -1.9841708
X = vul[,c("ln_events","ln_fert","hdi","ln_pop")]
all(X==vul[,c(3:6)])
## [1] TRUE
cor_mat = cor(X)
cov_mat = cov(X)
propres = eigen(cor_mat)
propres$values
## [1] 1.9963221 1.5984915 0.2715593 0.1336271
propres$values[1] / propres$values
## [1] 1.000000 1.248879 7.351330 14.939503
propres$values[1] / propres$values[4]
## [1] 14.9395
max(propres$values[1] / propres$values)
## [1] 14.9395
library(HH,quietly = TRUE)
vif(fit)
## ln_events ln_fert hdi ln_pop
## 2.421759 3.642415 3.767663 2.460624
Les VIF sont tous inférieurs à 5, donc pas de problème de colinéarité.
Calcul des VIFs directement à partir des formules.
R2.1 = summary(lm(ln_events~ln_fert+hdi+ln_pop, data=vul))$r.squared
(VIF1 <- 1/(1 - R2.1))
## [1] 2.421759
R2.2 = summary(lm(ln_fert~ln_events+hdi+ln_pop, data=vul))$r.squared
(VIF2 <- 1/(1 - R2.2))
## [1] 3.642415
R2.3 = summary(lm(hdi~ln_events+ln_fert+ln_pop, data=vul))$r.squared
(VIF3 <- 1/(1 - R2.3))
## [1] 3.767663
R2.4 = summary(lm(ln_pop~ln_events+ln_fert+hdi, data=vul))$r.squared
(VIF4 <- 1/(1 - R2.4))
## [1] 2.460624
yhat = fitted(fit)
#fit$fitted.values
e = residuals(fit)
#fit$residuals
Vérification du calcul des résidus à partir des valeurs ajustées. Utilité de la fonction all.equal par rapport à la comparaison directe avec ==.
with(vul, all(ln_death_risk-yhat==residuals(fit)))
## [1] FALSE
with(vul, all.equal(ln_death_risk-yhat,residuals(fit)))
## [1] TRUE
e_prime = rstandard(fit)
e_star = rstudent(fit)
lattice::histogram(e_prime-e_star)
boxplot(e_prime-e_star)
#plot(fit,which=2)
shapiro.test(e)
##
## Shapiro-Wilk normality test
##
## data: e
## W = 0.94169, p-value = 1.058e-05
qqnorm(e)
qqline(e)
L’asymétrie de la distribution de la racine carré de la valeur absolue des résidus standardisés \(\sqrt{|E|}\) est beaucoup plus faible que celle de la valeur absolue des résidus standardisés \(|E|\) si les \(E\) sont distribuées suivant des lois normales centrées \(N(0,.)\). Utiliser cette racine carrée permet donc d’avoir un indicateur supplémentaire de normalité.
plot(fit,which=1)
if(!("lmtest" %in% rownames(installed.packages()))){install.packages("lmtest")}
plot(fit,which=3)
library(lmtest, quietly = TRUE)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(ln_death_risk~ln_events+ln_fert+hdi+ln_pop+I(ln_events^2)+I(ln_fert^2)+I(hdi^2)+I(ln_pop^2),data = vul)
##
## studentized Breusch-Pagan test
##
## data: ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop + I(ln_events^2) + I(ln_fert^2) + I(hdi^2) + I(ln_pop^2)
## BP = 8.3579, df = 8, p-value = 0.3993
##Pas de normalité -> permutations ###Version 1 avec lmPerm
library(lmPerm)
fit_p <- lmp(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
## [1] "Settings: unique SS : numeric variables centered"
summary(fit_p)
##
## Call:
## lmp(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop,
## data = vul)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4518 -0.7673 -0.1513 0.5669 6.2271
##
## Coefficients:
## Estimate Iter Pr(Prob)
## ln_events 1.3708 5000 <2e-16 ***
## ln_fert 2.1961 5000 <2e-16 ***
## hdi 1.9922 1143 0.0805 .
## ln_pop -0.5672 5000 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-Squared: 0.4221, Adjusted R-squared: 0.4055
## F-statistic: 25.38 on 4 and 139 DF, p-value: 8.522e-16
###Version 2 avec pgirmess
library(pgirmess)
fit_p2 <- PermTest(fit)
print(fit_p2)
##
## Monte-Carlo test
##
## Call:
## PermTest.lm(obj = fit)
##
## Based on 1000 replicates
## Simulated p-value:
## p.value
## ln_events 0.000
## ln_fert 0.000
## hdi 0.002
## ln_pop 0.000
n =50
X=rnorm(n,1)
epsilon = rnorm(n,0,0.5)
Yok = 2 +3* X+epsilon
lmok = lm(Yok~X)
Yhs = 2 +3* X+ abs(X)^2*epsilon
lmhs = lm(Yhs~X)
par(mfrow=c(1,2))
plot(lmok,which=1,main="cas ok")
plot(lmhs,which=1,main="cas heteroscedastique")
#plot(lmnl,which=1,main="cas non lineaire")
par(mfrow=c(1,2))
plot(lmok,which=3,main="cas ok")
plot(lmhs,which=3,main="cas heteroscedastique")
#plot(lmnl,which=3,main="cas non lineaire")
pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, main="Simple Scatterplot Matrix", data=vul, col=(24+36*as.numeric(abs(e_star)>2)))
#Illustrer notion de levier
influences = lm.influence(fit)
hat = influences$hat
pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, data=vul,
main="Simple Scatterplot Matrix",
col=(24+50*as.numeric(hat>(2*4+2) / nrow(vul))))
any((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## [1] FALSE
which((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## integer(0)
par(mfrow=c(1,2))
plot(fit, which=4:5)
dfbetas=apply(abs(influences$coefficients)/influences$sigma,1,`/`,sqrt(c(144,diag(cov_mat))))
colSums(dfbetas>2/nrow(vul))
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 1 1 2 2 2 2 1 2 4 1 2 2 1 3 1 2 0 2 2
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 2 2 2 1 2 3 1 1 1 0 2 1 1 2 1 2 2 2 3 2
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 3 1 1 1 1 2 1 2 2 4 2 1 3 1 4 4 1 2 1 1
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 1 2 2 0 1 0 4 3 2 2 1 1 3 1 2 2 1 1 2 3
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 1 1 2 3 1 2 2 5 2 2 1 1 3 2 2 2 4 1 2 1
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 1 2 0 2 1 2 2 1 2 2 1 2 2 2 1 1 2 1 1 2
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 2 2 3 2 1 1 3 0 4 1 2 2 1 2 1 2 2 0 4 2
## 141 142 143 144
## 0 1 1 0
n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)
Ynl = 2 - X[,1] + 3* X[,2]^2 + epsilon
lmnl = lm(Ynl~X[,1]+X[,2])
par(mfrow=c(1,2))
plot(lmnl, which = 1:2)
par(mfrow=c(1,2))
plot(lmnl, which = 3:4)
library(car)
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
##
## Attaching package: 'car'
## The following objects are masked from 'package:HH':
##
## logit, vif
## The following objects are masked from 'package:faraway':
##
## logit, vif
crPlots(lmnl)
X2_2 = X[,2]^2
lmnl_2 = lm(Ynl~X[,1]+X2_2)
crPlots(lmnl_2)
n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)
ln_Y = 1 - X[,1] + 0.1* X[,2] + epsilon
Y = exp(ln_Y)
lm_ln = lm(Y~X[,1]+X[,2])
par(mfrow=c(1,2))
plot(lm_ln, which = 1:2)
par(mfrow=c(1,2))
plot(lm_ln, which = 3:4)
crPlots(lm_ln)
lm_ln_trans = lm(log(Y)~X[,1]+X[,2])
plot(lm_ln_trans)
crPlots(lm_ln_trans)
?mtcars
mtcars_simple = mtcars[c(1,2,6)]
summary(mtcars_simple)
## mpg cyl wt
## Min. :10.40 Min. :4.000 Min. :1.513
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:2.581
## Median :19.20 Median :6.000 Median :3.325
## Mean :20.09 Mean :6.188 Mean :3.217
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:3.610
## Max. :33.90 Max. :8.000 Max. :5.424
library("dplyr")
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
mtcars_simple<- dplyr::mutate(mtcars_simple, cyl = factor(cyl))
glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
fit_simple = lm(mpg~wt+factor(cyl),data=mtcars_simple)
summary(fit_simple)
##
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5890 -1.2357 -0.5159 1.3845 5.7915
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.9908 1.8878 18.006 < 2e-16 ***
## wt -3.2056 0.7539 -4.252 0.000213 ***
## factor(cyl)6 -4.2556 1.3861 -3.070 0.004718 **
## factor(cyl)8 -6.0709 1.6523 -3.674 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared: 0.8374, Adjusted R-squared: 0.82
## F-statistic: 48.08 on 3 and 28 DF, p-value: 3.594e-11
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
##
## layer
ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
geom_point()
fit_croise = lm(mpg ~ wt * cyl, data = mtcars_simple)
summary(fit_croise)
##
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1513 -1.3798 -0.6389 1.4938 5.2523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.571 3.194 12.389 2.06e-12 ***
## wt -5.647 1.359 -4.154 0.000313 ***
## cyl6 -11.162 9.355 -1.193 0.243584
## cyl8 -15.703 4.839 -3.245 0.003223 **
## wt:cyl6 2.867 3.117 0.920 0.366199
## wt:cyl8 3.455 1.627 2.123 0.043440 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared: 0.8616, Adjusted R-squared: 0.8349
## F-statistic: 32.36 on 5 and 26 DF, p-value: 2.258e-10
ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
geom_point() +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
## `geom_smooth()` using formula 'y ~ x'
ggplot(mtcars_simple, aes(x=wt, y=mpg)) +
geom_point() +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE, formula = y ~ x)
summary(fit_simple)
##
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5890 -1.2357 -0.5159 1.3845 5.7915
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.9908 1.8878 18.006 < 2e-16 ***
## wt -3.2056 0.7539 -4.252 0.000213 ***
## factor(cyl)6 -4.2556 1.3861 -3.070 0.004718 **
## factor(cyl)8 -6.0709 1.6523 -3.674 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared: 0.8374, Adjusted R-squared: 0.82
## F-statistic: 48.08 on 3 and 28 DF, p-value: 3.594e-11
summary(fit_croise)
##
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1513 -1.3798 -0.6389 1.4938 5.2523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.571 3.194 12.389 2.06e-12 ***
## wt -5.647 1.359 -4.154 0.000313 ***
## cyl6 -11.162 9.355 -1.193 0.243584
## cyl8 -15.703 4.839 -3.245 0.003223 **
## wt:cyl6 2.867 3.117 0.920 0.366199
## wt:cyl8 3.455 1.627 2.123 0.043440 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared: 0.8616, Adjusted R-squared: 0.8349
## F-statistic: 32.36 on 5 and 26 DF, p-value: 2.258e-10
anova(fit_simple)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## wt 1 847.73 847.73 129.6650 5.079e-12 ***
## factor(cyl) 2 95.26 47.63 7.2856 0.002835 **
## Residuals 28 183.06 6.54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_croise)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## wt 1 847.73 847.73 141.3883 5.133e-12 ***
## cyl 2 95.26 47.63 7.9443 0.00203 **
## wt:cyl 2 27.17 13.58 2.2658 0.12386
## Residuals 26 155.89 6.00
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_simple,fit_croise)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt + factor(cyl)
## Model 2: mpg ~ wt * cyl
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 28 183.06
## 2 26 155.89 2 27.17 2.2658 0.1239